#nbi:hide_in
from IPython.display import display, Markdown, clear_output
from ipywidgets import interact, ButtonStyle
import ipywidgets as widgets

import os
import pandas as pd
import numpy as np
#nbi:hide_in
gmbr_mudah = widgets.HTML(
    value='<img src="mudah.png">',
    placeholder='',
    description='',

)
#gmbr_mudah
#widgets.HTML(value='<img src="path_to_image.png">')
#nbi:hide_in
gmbr_scam = widgets.HTML(
    value='<img src="Suspicous Ads.png">',
    placeholder='',
    description='',

)
#gmbr_scam
#nbi:hide_in
helpbtn = widgets.Button(
    style=ButtonStyle(button_color='lightblue'),
    description='Help',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
)
#helpbtn
#nbi:hide_in
aboutbtn = widgets.Button(
    style=ButtonStyle(button_color='lightblue'),
    description='About Us',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
)
#aboutbtn
#nbi:hide_in
str_header1 = widgets.HTML(
    value="Beware of <b>SCAMMER</b>, buy from the <b>TRUSTED<b> one",
    placeholder='',
    description='',

)
#str_header1
#nbi:hide_in
str_scammer = widgets.HTML(
    value="<b>Scammer Detector<b>",
    placeholder='',
    description='',

)
#nbi:hide_in
#header = widgets.VBox([widgets.HBox([logo_mudah, str_scammer, helpbtn, aboutbtn]), str_header1])
header = widgets.HBox([gmbr_mudah, gmbr_scam, widgets.VBox([helpbtn, aboutbtn])])
#header = widgets.HBox([gmbr_mudah, gmbr_scam, helpbtn, aboutbtn])
#header
#nbi:hide_in
meurl=widgets.Text(value= '', placeholder='Paste your mudah.my link here', description='Link:', disabled=False)
#meurl
#nbi:hide_in
type_car = ['Perodua Kancil', 'Perodua Myvi', 'Honda Accord', 'Honda Civic', 'Toyota Vios' ]
car_dropdown = widgets.Dropdown(
    options= type_car,
    value= type_car[0],
    description='Type of Car:',
    disabled=False,
)
#car_dropdown
#nbi:hide_in
list_location = ['Selangor', 'Kuala Lumpur']
location_dropdown = widgets.Dropdown(
    options= list_location,
    value= list_location[0],
    description='Location:',
    disabled=False,
)
#location_dropdown
#nbi:hide_in
pageslist = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
pages_dropdown = widgets.Dropdown(
    options = pageslist,
    value = pageslist[0],
    description='Num Pages:',
    disabled=False,
)
#pages_dropdown
#nbi:hide_in
scrap_param = widgets.HBox([car_dropdown, location_dropdown, pages_dropdown])
#scrap_param
#nbi:hide_in
fpath = widgets.Text(
    value='',
    placeholder='Save Scraped Dataset',
    description='Folder Path:',
    disabled=False
)
#fpath
#nbi:hide_in
fname = widgets.Text(
    value='',
    placeholder='Name your scrap csv file',
    description='CSV file:',
    disabled=False
)
#fnamea
save_csvname = fname.value + '.csv'
save_folderpath = os.path.join(fpath.value, csvname)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-098e3ad2ea47> in <module>()
      1 save_csvname = fname.value + '.csv'
----> 2 save_folderpath = os.path.join(fpath.value, csvname)

NameError: name 'csvname' is not defined
save_folderpath
#nbi:hide_in
lpath = widgets.Text(
    value='',
    placeholder='Upload cleaned dataset',
    description='Folder Path:',
    disabled=False
)
#lpath
#nbi:hide_in
lname = widgets.Text(
    value='',
    placeholder='Name of cleaned csv file',
    description='CSV file:',
    disabled=False
)
#lname
clean_csvname = fname.value + '.csv'
clean_folderpath = os.path.join(fpath.value, csvname)
clean_folderpath
#nbi:hide_in
str_link= widgets.HTML(
    value="<i><b>https://www.mudah.my/malaysia/cars-for-sale?o=1&q=&th=1</b><i>",
    placeholder='',
    description='Sample Link:',
)
#str_link
#nbi:hide_in
str_folder= widgets.HTML(
    value="<i><b>C:\\Users\\LENOVO\\Documents</b><i>",
    placeholder='',
    description='Sample Path:',
)
#str_folder
#nbi:hide_in
str_csv= widgets.HTML(
    value="<i><b>kereta</b><i>",
    placeholder='',
    description='Sample CSV:',
)
#str_csv
#nbi:hide_in
str_view= widgets.HTML(
    value="<b>Choose how to view your data</b>",
    placeholder='',
    description='',
)
#str_view
#nbi:hide_in
viewlist_ = ['5', '6', '7', '8', '9', '10']
view_dropdown = widgets.Dropdown(
    options = viewlist_,
    value = viewlist_[0],
    description='Num Rows:',
    disabled=False,
)
#view_dropdown
#nbi:hide_in
view_by = ['Head', 'Random', 'Tail']
viewby_dropdown = widgets.Dropdown(
    options = view_by,
    value = view_by[0],
    description='By:',
    disabled=False,
)
#viewby_dropdown
#nbi:hide_in
viewby_btn = widgets.Button(
    #style=ButtonStyle(button_color='blue'),
    description='View',
    disabled=False,
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click Me',
    icon=''
)
#viewby_btn
#nbi:hide_in
#import cleaned datafile
# C:\Users\LENOVO\Documents\fyp fathiah\Program\Test Case\dataset
os.chdir(r"C:\Users\LENOVO\Documents\fyp fathiah\Program\Test Case\dataset")
missing_values = ["n/a", "na", "-", "0"]
#filepath_or_buffer = "ToyotaVios KL & SELANGORNUMCOL.csv"
filepath_or_buffer = "HondaAccord KL & SELANGOR.csv"
df = pd.read_csv(filepath_or_buffer, na_values = missing_values)
#nbi:hide_in
def view(obj, n):
    if (obj == view_by[0]):
        print(obj)
        display(df.head(n))
    elif (obj == view_by[1]):
        print(obj)
        display(df.sample(n))
    elif (obj == view_by[2]):
        print(obj)
        display(df.tail(n))
#nbi:hide_in
#bscrap = widgets.Button(style=ButtonStyle(button_color='orange'), description='Srap & Save')
view_output = widgets.Output()
def on_button_clicked(_):
      # "linking function with output"
      with view_output:
          # what happens when we press the button
          clear_output()
          view(viewby_dropdown.value, int(view_dropdown.value))
          #print('Me Scrapped!')
# linking button and function together using a button's method
viewby_btn.on_click(on_button_clicked)
#nbi:hide_in
view_by_header = widgets.VBox([widgets.HBox([viewby_dropdown, view_dropdown]), widgets.VBox([viewby_btn, view_output])])
#view_by_header
view(viewby_dropdown.value, int(view_dropdown.value))
Random
Price Manufactured Year NewMil
70 19500.0 2007.0 22500.0
46 30000.0 2009.0 82500.0
0 65000.0 2017.0 52500.0
63 28700.0 2009.0 155000.0
14 45000.0 2014.0 125000.0
18 35000.0 2010.0 125000.0
51 38500.0 2012.0 165000.0
df.sample(5)
Price Manufactured Year NewMil
53 49000.0 2014.0 57500.0
20 31588.0 2010.0 155000.0
42 46500.0 2014.0 57500.0
58 17500.0 2005.0 165000.0
66 17999.0 2005.0 155000.0
#nbi:hide_in
str_pca= widgets.HTML(
    value="Apply <b>Data Mining<b> Method!!",
    placeholder='',
    description='',
)
#str_pca
#nbi:hide_in
str_box= widgets.HTML(
    value="<b>Choose to view and compare by attributes<b>",
    placeholder='',
    description='',
)
#str_box
#nbi:hide_in
str_result= widgets.HTML(
    value="<b>Click to view Potential Scammers!!<b>",
    placeholder='',
    description='',
)
#str_result
#nbi:hide_in
pcabtn = widgets.Button(
    style=ButtonStyle(button_color='orange'),
    description='PCA!',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check'
)
#pcabtn
#bscrap = widgets.Button(description='Scrap Me')
def pca_plot(FinalData):
    import plotly.graph_objs as go
    from plotly.offline import iplot, init_notebook_mode, download_plotlyjs, plot 
    from plotly.widgets import GraphWidget
    init_notebook_mode()
    trace = go.Scatter(x=FinalData[:,0], y=FinalData[:,1], mode="markers", marker = dict(size=10, line = dict(width=1) ,color="pink"), text= mytexts)
    mydata = [trace]
    mylayout = go.Layout(title='2 Component PCA: ', xaxis=dict(title='component 1'), yaxis=dict(title='component 2'), hovermode = 'closest', autosize=False, width=1000, height=1000)
    fig_comp = go.Figure(data=mydata, layout=mylayout)
    iplot(fig_comp)
    #plot(fig_comp)
pca_plot(FinalData)
#nbi:hide_in
#bscrap = widgets.Button(style=ButtonStyle(button_color='orange'), description='Srap & Save')
pca_output = widgets.Output()
def on_button_clicked(_):
      # "linking function with output"
      with pca_output:
          # what happens when we press the button
          clear_output()
          pca_plot(FinalData)
          #print('Me Scrapped!')
# linking button and function together using a button's method
pcabtn.on_click(on_button_clicked)
strcol = ['Name', 'Mileage', 'CC', 'Condition', 'Link']
datacol = ['Price', 'Manufactured Year', 'NewMil']
x = df[datacol]
y = df[strcol]
#r = len(df2) #all rows in d2(cleaned dataset)
#c = len(x.columns) #numcols only
#1.FORMING MATRIX D(RowDataAdjust)
def step1 (r, c, x):
    avgs = []
    for i in range(len(x.columns)):
        avg = x.iloc[:, i].mean()
        avgs.append(avg)
    
    MeanAdjustedData = np.zeros((r,c))
    
    minus = 0.0
    for j in range(c): #column
        for i in range(r): #row
            minus = x.iloc[i, j] - avgs[j] 
            MeanAdjustedData[i][j] = minus
            #print(j, i, df1.iloc[i, j], avgs[j], minus, MeanAdjustedData[i][j])
        minus = 0.0
    
    return MeanAdjustedData
#2. FORMING COVARIANCE MATRIX
def step2(r, matrix):
    mymat = np.dot((1/(r-1)), matrix.T)
    mycov = np.dot(mymat, MeanAdjustedData)
    return mycov
#3. CALCULATE EIGENVALUES, EIGENVECTORS
def step3_4(r, matrix):
    import scipy.linalg as la
    A = np.array(matrix)
    results = la.eig(A)
    
    # First column is the first eigenvector
    eigvals, eigvecs = la.eig(A)
    
    #SORTING EIGENVALS WITH CORRESPOND EIGENVECS
    idx = eigvals.argsort()[::-1]   
    eigvals = eigvals[idx]
    eigvecs = eigvecs[:,idx]
    
    #2 COMPONENTS
    feature_vec = np.zeros((r,2))
    
    #copy r rows and c columns from eigvecs
    for j in range(len(feature_vec[0])): #column
        for i in range(len(feature_vec)): #row 
            feature_vec[i][j]=eigvecs[i][j].copy()
            
    return feature_vec
def step5(matrix1, matrix2):
    RowFeatureVecs_ = matrix1.T
    RowDataAdjust = matrix2.T
    FinalData = np.dot(RowFeatureVecs_, RowDataAdjust)
    FinalDataT = FinalData.T
    
    return FinalDataT
MeanAdjustedData = step1(len(df), len(x.columns), x)
mycov = step2(len(df), MeanAdjustedData)
feature_vec = step3_4(len(x.columns), mycov)
FinalData = step5(feature_vec, MeanAdjustedData)
NewFinal = pd.DataFrame(FinalData, columns=['PC1', 'PC2'])
NewFinal.head()
PC1 PC2
0 -98433.068486 38708.457401
1 -83932.388349 33514.204082
2 -58789.154164 -10352.730902
3 -128318.958216 -45926.546015
4 -1139.086049 31321.239963
NewFinal.to_csv('FinalData_HondaAccord.csv', index=False, header=True)
def data_label(df):
    prices = []
    inds =[]
    years = []
    mils = []
    for i in range(len(df)):
        price = "Price: " + df['Price'].values[i].astype(str)
        prices.append(price)
        ind = "ID: " + df.index.values[i].astype(str)
        inds.append(ind)
        year = "Year: " + df['Manufactured Year'].values[i].astype(str) #Private
        years.append(year)
        mil = "Mileage: " + df['NewMil'].values[i].astype(str)
        mils.append(mil)

    mytexts = []
    for i in range(len(df)):
        mytext = inds[i] + ", " + prices[i] + ", " + years[i] + ", " + mils[i]
        mytexts.append(mytext)
    
    return mytexts
mytexts = data_label(df)
mytexts[0]
'ID: 0, Price: 5500.0, Year: 2000.0, Mileage: 37500.0'
#6 PLOTTING PCA SCATTER PLOT
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode, download_plotlyjs, plot 
from plotly.widgets import GraphWidget
init_notebook_mode()
trace = go.Scatter(x=FinalData[:,0], y=FinalData[:,1], mode="markers", marker = dict(size=10, line = dict(width=1) ,color="pink"), text= mytexts)
mydata = [trace]
mylayout = go.Layout(title='2 Component PCA: ', xaxis=dict(title='component 1'), yaxis=dict(title='component 2'), hovermode = 'closest', autosize=False, width=1000, height=1000)
fig_comp = go.Figure(data=mydata, layout=mylayout)
#iplot(fig_comp)
plot(fig_comp)
'file://C:\\Users\\LENOVO\\Documents\\fyp fathiah\\Program\\Test Case\\dataset\\temp-plot.html'
#nbi:hide_in
boxplotbtn = widgets.Button(
    style=ButtonStyle(button_color='orange'),
    description='Boxplot',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check'
)
#boxplotbtn
#nbi:hide_in
boxplot_filter = widgets.SelectMultiple(
    options=['Price', 'Year', 'Mileage'],
    value=['Price'],
    rows=3,
    description='Filter by',
    disabled=False
)
#nbi:hide_in
scambtn = widgets.Button(
    style=ButtonStyle(button_color='red'),
    description='Click Me',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check'
)
#scambtn
#nbi:hide_in
#filepath=fpath.value + "\\" + fname.value + ".csv"
#filepath
#nbi:hide_in
#accordion.children[0].value
#nbi:hide_in
#import dataframe
import pandas as pd
import requests as req
#calling package URL lib
from urllib.request import urlopen as uReq
#pase HTML text
from bs4 import BeautifulSoup as soup


#filename = "df_mulpages.csv"
#filename = filepath
def scrapme(meurl, filepath):
    filename = filepath
    def subs (Mileage):
        if Mileage == "0 - 4 999":
            return Mileage.replace("0 - 4 999", "2500", 1)
        elif Mileage == "5 000 - 9 999":
            return Mileage.replace("5 000 - 9 999", "52500", 1)
        elif Mileage == "10 000 - 14 999":
            return Mileage.replace("10 000 - 14 999", "12500", 1)
        elif Mileage == "15 000 - 19 999":
            return Mileage.replace("15 000 - 19 999", "17500", 1)
        elif Mileage == "20 000 - 24 999":
            return Mileage.replace("20 000 - 24 999", "22500", 1)
        elif Mileage == "25 000 - 29 999":
            return Mileage.replace("25 000 - 29 999", "27500", 1)
        elif Mileage == "30 000 - 34 999":
            return Mileage.replace("30 000 - 34 999", "32500", 1)
        elif Mileage == "35 000 - 39 999":
            return Mileage.replace("35 000 - 39 999", "37500", 1)
        elif Mileage == "40 000 - 44 999":
            return Mileage.replace("40 000 - 44 999", "42500", 1)
        elif Mileage == "45 000 - 49 999":
            return Mileage.replace("45 000 - 49 999", "47500", 1)
        elif Mileage == "50 000 - 54 999":
            return Mileage.replace("50 000 - 54 999", "52500", 1)
        elif Mileage == "55 000 - 59 999":
            return Mileage.replace("55 000 - 59 999", "57500", 1)
        elif Mileage == "60 000 - 64 999":
            return Mileage.replace("60 000 - 64 999", "62500", 1)
        elif Mileage == "65 000 - 69 999":
            return Mileage.replace("65 000 - 69 999", "67500", 1)
        elif Mileage == "70 000 - 74 999":
            return Mileage.replace("70 000 - 74 999", "72500", 1)
        elif Mileage == "75 000 - 79 999":
            return Mileage.replace("75 000 - 79 999", "77500", 1)
        elif Mileage == "80 000 - 84 999":
            return Mileage.replace("80 000 - 84 999", "82500", 1)
        elif Mileage == "85 000 - 89 999":
            return Mileage.replace("85 000 - 89 999", "87500", 1)
        elif Mileage == "90 000 - 94 999":
            return Mileage.replace("90 000 - 94 999", "92500", 1)
        elif Mileage == "95 000 - 99 999":
            return Mileage.replace("95 000 - 99 999", "97500", 1)
        elif Mileage == "100 000 - 109 999":
            return Mileage.replace("100 000 - 109 999", "105000", 1)
        elif Mileage == "110 000 - 119 999":
            return Mileage.replace("110 000 - 119 999", "115000", 1)
        elif Mileage == "120 000 - 129 999":
            return Mileage.replace("120 000 - 129 999", "125000", 1)
        elif Mileage == "130 000 - 139 999":
            return Mileage.replace("130 000 - 139 999", "135000", 1)
        elif Mileage == "140 000 - 149 999":
            return Mileage.replace("140 000 - 149 999", "145000", 1)
        elif Mileage == "150 000 - 159 999":
            return Mileage.replace("150 000 - 159 999", "155000", 1)
        elif Mileage == "160 000 - 169 999":
            return Mileage.replace("160 000 - 169 999", "165000", 1)
        elif Mileage == "170 000 - 179 999":
            return Mileage.replace("170 000 - 179 999", "175000", 1)
        elif Mileage == "180 000 - 189 999":
            return Mileage.replace("180 000 - 189 999", "185000", 1)
        elif Mileage == "190 000 - 199 999":
            return Mileage.replace("190 000 - 199 999", "195000", 1)
        elif Mileage == "200 000 - 249 999":
            return Mileage.replace("200 000 - 249 999", "225000", 1)
        elif Mileage == "250 000 - 299 999":
            return Mileage.replace("250 000 - 299 999", "275000", 1)
        elif Mileage == "300 000 - 349 999":
            return Mileage.replace("300 000 - 349 999", "325000", 1)
        elif Mileage == "350 000 - 399 999":
            return Mileage.replace("350 000 - 399 999", "375000", 1)
        elif Mileage == "400 000 - 449 999":
            return Mileage.replace("400 000 - 449 999", "425000", 1)
        elif Mileage == "450 000 - 499 999":
            return Mileage.replace("450 000 - 499 999", "475000", 1)
        else:
            return Mileage

        
    def dprice (a):
        if (' ' in a) == True:
            return ''.join(a.split())
        else:
            return a
    container = []
    pages = []

    for i in range(1,2):
        my_url = 'https://www.mudah.my/kuala-lumpur/cars-for-sale/toyota/vios?o=' + str(i) + '&q=&so=1&f=p&th=1'
        pages.append(my_url)

    for item in pages:
        page = req.get(item)
        page_soup = soup(page.text, "html.parser")
        containers = page_soup.findAll("div", {"class":"listing_params_container"})


        for fathiah in containers:

            clink = fathiah.div.div.a["href"]
            name = fathiah.div.div.a["title"].strip()
            price = fathiah.findAll("div", {"class":"ads_price"})
            Price = price[0].text.strip()
            year = fathiah.findAll("font", {"class":"icon_label"})
            Year = year[1].text.strip()
            Mileage = year[2].text.strip()
            CC = year[3].text.strip()
            Condition = year[0].text.strip()

            newMil = subs(Mileage).strip()
            Price1 = Price.replace("RM", "", 1)
            nPrice = dprice(Price1)
            container.append((name, nPrice, Year, Mileage, newMil, CC, Condition, clink))



    df = pd.DataFrame(container, columns = ['Name', 'Price', 'Manufactured Year', 'Mileage', 'NewMil', 'CC', 'Condition', 'Link'])
    df.to_csv(filename, index=False, encoding='utf-8')
    #print (df)
    print("Done scrap " + filename)
     
#nbi:hide_in
def scrap(mycar, mylocation, mypage, filepath):
    filename = filepath
    def subs (Mileage):
        if Mileage == "0 - 4 999":
            return Mileage.replace("0 - 4 999", "2500", 1)
        elif Mileage == "5 000 - 9 999":
            return Mileage.replace("5 000 - 9 999", "52500", 1)
        elif Mileage == "10 000 - 14 999":
            return Mileage.replace("10 000 - 14 999", "12500", 1)
        elif Mileage == "15 000 - 19 999":
            return Mileage.replace("15 000 - 19 999", "17500", 1)
        elif Mileage == "20 000 - 24 999":
            return Mileage.replace("20 000 - 24 999", "22500", 1)
        elif Mileage == "25 000 - 29 999":
            return Mileage.replace("25 000 - 29 999", "27500", 1)
        elif Mileage == "30 000 - 34 999":
            return Mileage.replace("30 000 - 34 999", "32500", 1)
        elif Mileage == "35 000 - 39 999":
            return Mileage.replace("35 000 - 39 999", "37500", 1)
        elif Mileage == "40 000 - 44 999":
            return Mileage.replace("40 000 - 44 999", "42500", 1)
        elif Mileage == "45 000 - 49 999":
            return Mileage.replace("45 000 - 49 999", "47500", 1)
        elif Mileage == "50 000 - 54 999":
            return Mileage.replace("50 000 - 54 999", "52500", 1)
        elif Mileage == "55 000 - 59 999":
            return Mileage.replace("55 000 - 59 999", "57500", 1)
        elif Mileage == "60 000 - 64 999":
            return Mileage.replace("60 000 - 64 999", "62500", 1)
        elif Mileage == "65 000 - 69 999":
            return Mileage.replace("65 000 - 69 999", "67500", 1)
        elif Mileage == "70 000 - 74 999":
            return Mileage.replace("70 000 - 74 999", "72500", 1)
        elif Mileage == "75 000 - 79 999":
            return Mileage.replace("75 000 - 79 999", "77500", 1)
        elif Mileage == "80 000 - 84 999":
            return Mileage.replace("80 000 - 84 999", "82500", 1)
        elif Mileage == "85 000 - 89 999":
            return Mileage.replace("85 000 - 89 999", "87500", 1)
        elif Mileage == "90 000 - 94 999":
            return Mileage.replace("90 000 - 94 999", "92500", 1)
        elif Mileage == "95 000 - 99 999":
            return Mileage.replace("95 000 - 99 999", "97500", 1)
        elif Mileage == "100 000 - 109 999":
            return Mileage.replace("100 000 - 109 999", "105000", 1)
        elif Mileage == "110 000 - 119 999":
            return Mileage.replace("110 000 - 119 999", "115000", 1)
        elif Mileage == "120 000 - 129 999":
            return Mileage.replace("120 000 - 129 999", "125000", 1)
        elif Mileage == "130 000 - 139 999":
            return Mileage.replace("130 000 - 139 999", "135000", 1)
        elif Mileage == "140 000 - 149 999":
            return Mileage.replace("140 000 - 149 999", "145000", 1)
        elif Mileage == "150 000 - 159 999":
            return Mileage.replace("150 000 - 159 999", "155000", 1)
        elif Mileage == "160 000 - 169 999":
            return Mileage.replace("160 000 - 169 999", "165000", 1)
        elif Mileage == "170 000 - 179 999":
            return Mileage.replace("170 000 - 179 999", "175000", 1)
        elif Mileage == "180 000 - 189 999":
            return Mileage.replace("180 000 - 189 999", "185000", 1)
        elif Mileage == "190 000 - 199 999":
            return Mileage.replace("190 000 - 199 999", "195000", 1)
        elif Mileage == "200 000 - 249 999":
            return Mileage.replace("200 000 - 249 999", "225000", 1)
        elif Mileage == "250 000 - 299 999":
            return Mileage.replace("250 000 - 299 999", "275000", 1)
        elif Mileage == "300 000 - 349 999":
            return Mileage.replace("300 000 - 349 999", "325000", 1)
        elif Mileage == "350 000 - 399 999":
            return Mileage.replace("350 000 - 399 999", "375000", 1)
        elif Mileage == "400 000 - 449 999":
            return Mileage.replace("400 000 - 449 999", "425000", 1)
        elif Mileage == "450 000 - 499 999":
            return Mileage.replace("450 000 - 499 999", "475000", 1)
        else:
            return Mileage

        
    def dprice (a):
        if (' ' in a) == True:
            return ''.join(a.split())
        else:
            return a
    container = []
    pages = []
    
    #Parameter1: LOCATION
    param1 = location(mylocation)
    car_sale = 'cars-for-sale' + '/'
    #Parameter2: TYPE OF CAR
    param2 = car(mycar)
    #Parameter3: pages
    n = int(mypage)
    user_seller = '&f=p'
    
    front_link = 'https://www.mudah.my/' + param1 + car_sale + param2 + 'o='
    end_link = '&q' + '&so=1' + user_seller + '&th=1'
    
    test_link = front_link + '1' + end_link
    
    uClient = uReq(test_link)
    page_html = uClient.read()
    uClient.close()
    num_page_soup = soup(page_html, "html.parser")
    num_containers = num_page_soup.findAll("div", {"class":"listing_title"})
    str_pg = num_containers[0].h1.text
    split_str_pg = str_pg.split(' ')
    max_pg = int(split_str_pg[-1])
    print('Max pg:' , max_pg, 'Chosen pages:', n)
    
    if (n <= max_pg):
        print('if', n, max_pg)
        for i in range(1, n+1 ,1):
            #my_url = 'https://www.mudah.my/kuala-lumpur/cars-for-sale/toyota/vios?o=' + str(i) + '&q=&so=1&f=p&th=1'
            print('first for loop')
            my_url = front_link + str(i) + end_link
            print('My_url:' + my_url)
            pages.append(my_url)


        for item in pages:
            page = req.get(item)
            page_soup = soup(page.text, "html.parser")
            containers = page_soup.findAll("div", {"class":"listing_params_container"})


            for fathiah in containers:

                clink = fathiah.div.div.a["href"]
                name = fathiah.div.div.a["title"].strip()
                price = fathiah.findAll("div", {"class":"ads_price"})
                Price = price[0].text.strip()
                year = fathiah.findAll("font", {"class":"icon_label"})
                Year = year[1].text.strip()
                Mileage = year[2].text.strip()
                CC = year[3].text.strip()
                Condition = year[0].text.strip()

                newMil = subs(Mileage).strip()
                Price1 = Price.replace("RM", "", 1)
                nPrice = dprice(Price1)
                container.append((name, nPrice, Year, Mileage, newMil, CC, Condition, clink))



        df = pd.DataFrame(container, columns = ['Name', 'Price', 'Manufactured Year', 'Mileage', 'NewMil', 'CC', 'Condition', 'Link'])
        df.to_csv(filename, index=False, encoding='utf-8')
        #print (df)
        print("Done scrap " + filename)
        
        
    else:
        print('Unable to scrap, the chosen number of pages (', n,  ') exceeded the maximum page(s) available which is' , max_pg)
        pass
#def scrap(car, location, page, filepath)
scrap(car_dropdown.value, location_dropdown.value, pages_dropdown.value, 'C:\\Users\\LENOVO\\Documents\\fyp fathiah\\Program\\Test1.csv')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-b1da1d6d0e28> in <module>()
      1 #def scrap(car, location, page, filepath)
----> 2 scrap(car_dropdown.value, location_dropdown.value, pages_dropdown.value, 'C:\\Users\\LENOVO\\Documents\\fyp fathiah\\Program\\Test1.csv')

<ipython-input-37-1e3b424683b6> in scrap(mycar, mylocation, mypage, filepath)
     88 
     89     #Parameter1: LOCATION
---> 90     param1 = location(mylocation)
     91     car_sale = 'cars-for-sale' + '/'
     92     #Parameter2: TYPE OF CAR

NameError: name 'location' is not defined
car(car_dropdown.value)
location(location_dropdown.value)
#nbi:hide_in
def car(car):
    car_param = car
    car_param = car_param.replace(" ", "/") + "?"
    return (car_param.lower())
#nbi:hide_in
def location(location):
    location_param = location
    if ' ' in location_param:
        location_param = location_param.replace(" ", "-") + "/"
        print('y')
    else:
        location_param = location_param + "/"
        print('n')
    return(location_param.lower())
#nbi:hide_in

bscrap = widgets.Button(style=ButtonStyle(button_color='orange'), description='Srap & Save')
oscrap = widgets.Output()
def on_button_clicked(_):
      # "linking function with output"
      with oscrap:
          # what happens when we press the button
          clear_output()
          #scrapme(meurl.value, filepath)
          #plt.plot(X, y)
          print('Me Scrapped!')
# linking button and function together using a button's method
bscrap.on_click(on_button_clicked)
# displaying button and its output together
#widgets.VBox([bscrap,oscrap])
#nbi:hide_in
#print("This is header")
header
#nbi:hide_in
gmbr_kereta = widgets.HTML(
    value='<img src="highway car1_trademark_scamtrust.png">',
    placeholder='',
    description='',

)
gmbr_kereta
#nbi:hide_in
#accordion1 = widgets.Accordion(children=[widgets.VBox([str_link, meurl]), widgets.VBox([str_folder, fpath, str_csv, fname]), widgets.HBox([scrap_dropdown, widgets.VBox([bscrap,oscrap])])])
accordion1 = widgets.Accordion(children=[scrap_param, widgets.VBox([str_folder, fpath, str_csv, fname, widgets.VBox([bscrap,oscrap])])])
accordion1.set_title(0, 'Extract')
accordion1.set_title(1, 'Save')
#accordion1.set_title(0, 'Source')
#accordion1.set_title(1, 'Save')
#accordion1.set_title(2, 'Scrap')
accordion1
#nbi:hide_in
analysisbtn = widgets.HBox([boxplotbtn, boxplot_filter])
#analysisbtn
#nbi:hide_in
accordion2 = widgets.Accordion(children=[widgets.VBox([str_folder,lpath, str_csv, lname]), view_by_header])
accordion2.set_title(0, 'Upload')
accordion2.set_title(1, 'View')
accordion2
#nbi:hide_in
accordion3 = widgets.Accordion(children=[widgets.VBox([str_pca,widgets.VBox([pca_output, pcabtn])]), widgets.VBox([str_box, analysisbtn]), widgets.VBox([str_result, scambtn])])
accordion3.set_title(0, 'Method')
accordion3.set_title(1, 'Analysis')
accordion3.set_title(2, 'Result')
accordion3
#nbi:hide_in
tab_nest = widgets.Tab()
tab_nest.children = [accordion1, accordion2, accordion3]
tab_nest.set_title(0, '1. Extaction')
tab_nest.set_title(1, '2. Data')
tab_nest.set_title(2, '3. Evaluation')
print('Tab Nest')
display(tab_nest)
Tab Nest